#importing necessary packages

require(quanteda)
require(quanteda.textmodels)
require(quanteda.textplots)
library("quanteda")
library('quanteda.textmodels')
library('quanteda.textplots')
library("readtext")
library(tm)
library(NLP)
library(ggplot2)
library(readtext)

## loading data

dat <- readtext("53_legislatura.csv", text_field = "TEXT", encoding = "utf-8")
summary(corpus(dat), 5)

## creating corpus

mycorpus <- corpus(dat)
summary(mycorpus)

## creating document-frequency matrix + data pre-processing 

my_dfm <- dfm(mycorpus, remove_punct = TRUE)

my_dfm <- dfm_select(my_dfm, pattern = stopwords("portuguese"), selection = "remove", valuetype = "fixed") # remove stopwords
my_dfm <- dfm_select(my_dfm, min_nchar = 3) # selecting only tokens with at least 3 elements
my_dfm <- dfm_trim(my_dfm, min_termfreq = 10) # setting minimum term frequency to 10

my_dfm <- dfm_group(my_dfm, groups = Party) # group dfm by party

## Wordfish

tmod_wf <- textmodel_wordfish(my_dfm, dir = c(2, 1))

summary(tmod_wf) #summary

textplot_scale1d(tmod_wf) #plotting parties

#textplot_scale1d(tmod_wf, margin = "features") #plotting words
